/******************************************************************************
* Load individual level data, winsorize balance variables, and aggregate to cz level
* Need to set geographic level to individual
*****************************************************************************/


* csv file from spark
import delimited using "${dataRoot}/sumstats_${sumStatsyr}${sumStatsM}_${Geo}.csv", ///
	delimiters(",") varnames(1)  clear

describe
rename stateabbr state



*call program to clean up variables and perform some calculation
cleanUpVars 1

* capitalize first letter, long label
labelVars 0 1 "\addlinespace[0.1em] \\ "

*define North (upper midwest) vs South (deep south)
*this program generates variable "NvS"
defineNvS


* winsorize balance variable
foreach var of varlist *bal*{
	sum `var'
	winsor `var', p(0.001) gen(`var'_w) highonly
	replace `var' = `var'_w
	drop `var'_w
}

*summary statistics: outcome variables (Table 1)
estpost sum  ${keyTuVars}, d
est store sst

*generate LaTex output table 
esttab sst using "$outDir/Tex/sumstats_var_${Geo}_${sumStatsyr}.tex", replace ///
	cells("mean(fmt(%9.1fc)) sd(fmt(%9.1fc)) p50(fmt(%9.0fc)) p75(fmt(%9.0fc)) p90(fmt(%9.0fc)) p95(fmt(%9.0fc)) p99(fmt(%9.0fc))") ///
	collabels("Mean" "Std. Dev." "Median" "Pct 75" "Pct 90" "Pct 95" "Pct 99") ///
	nonumber nomtitle noobs label scalars("N") sfmt(%20.0fc)  ///
	refcat(unpdcoly3 " \addlinespace[0.1em] \\  \hspace{5mm} \emph{Collections - Flow}" ///
			unpdcol "\addlinespace[0.1em] \\ \addlinespace[0.1em] \\  \hspace{5mm} \emph{Collections - Stock}" ///
			ccdqy3   "\addlinespace[0.1em] \\ \addlinespace[0.1em] \\  \hspace{5mm} \emph{Credit Card}"  ///
			bkrty3 "\addlinespace[0.1em] \\ \addlinespace[0.1em] \\  \hspace{5mm} \emph{Bankruptcy}" , nolabel) ///
	prefoot("\addlinespace[0.1em] \\ \hline")

	
*aggregate to cz level
collapse ${keyTuVars} (sum) numobs, by(asofdate cz czname statefip state)
export delimited "${dataRoot}/sumstats_${sumStatsyr}${sumStatsM}_${Geo}toCZ.csv", replace
